//CESCovid

log using CESCovid.log, replace

use "C:\Users\sbstjp\OneDrive - Cardiff University\us-counties-2020.dta" // This is New York Times data on daily Covid infections and deaths at US county level and was downloaded as an excel file from https://github.com/nytimes/covid-19-data/blob/master/rolling-averages/us-counties-2020.csv
// Prior to this code, I matched the relevant countyfips code to the county name

// Create new variables which show percentage changes

sort county date

* Daily percentage changes
bysort countyfips (date): gen daily_case_pct = ((cases_avg_per_100k - cases_avg_per_100k[_n-1])/cases_avg_per_100k[_n-1]) * 100
bysort countyfips (date): gen daily_death_pct = ((deaths_avg_per_100k - deaths_avg_per_100k[_n-1])/deaths_avg_per_100k[_n-1]) * 100

* Weekly percentage changes (7-day)
bysort countyfips (date): gen weekly_case_pct = ((cases_avg_per_100k - cases_avg_per_100k[_n-7])/cases_avg_per_100k[_n-7]) * 100
bysort countyfips (date): gen weekly_death_pct = ((deaths_avg_per_100k - deaths_avg_per_100k[_n-7])/deaths_avg_per_100k[_n-7]) * 100

* Monthly percentage changes (30-day)
bysort countyfips (date): gen monthly_case_pct = ((cases_avg_per_100k - cases_avg_per_100k[_n-30])/cases_avg_per_100k[_n-30]) * 100
bysort countyfips (date): gen monthly_death_pct = ((deaths_avg_per_100k - deaths_avg_per_100k[_n-30])/deaths_avg_per_100k[_n-30]) * 100

// Convert variables to numeric for later merge

* Convert date to numeric
gen numeric_date = date(date, "MDY")
format numeric_date %td
drop date
rename numeric_date date

* Convert countyfips to numeric
gen numeric_countyfips = real(countyfips)
drop countyfips
rename numeric_countyfips countyfips

// In this dataset, many observations lack a date and, to do the later merge, deletion is necessary.

* Sort the data by the variables you're checking for duplicates
sort countyfips date

* Create a new variable to flag duplicates correctly
gen dup_flag = cond(_n == 1 | countyfips != countyfips[_n-1] | date != date[_n-1], 0, 1)

* drop duplicates
drop if dup_flag==1

// Now use Cooperative Election Study Common Content, 2020. Schaffner, Brian; Ansolabehere, Stephen; Luks, Sam, 2021, "Cooperative Election Study Common Content, 2020", https://doi.org/10.7910/DVN/E9N6PH, Harvard Dataverse, V4, UNF:6:zWLoanzs2F3awt+875kWBg== [fileUNF]

use "C:\Users\sbstjp\OneDrive - Cardiff University\CES20_Common_OUTPUT_vv.dta"

// Prepare for merge
* Convert countyfips to numeric
gen numeric_countyfips = real(countyfips)
drop countyfips
rename numeric_countyfips countyfips

* Add leading zero
format countyfips %05.0f

*Make a date only variable
gen date = dofc(starttime)
format date %td

// Merge
merge m:1 countyfips date using "C:\Users\sbstjp\OneDrive - Cardiff University\NYTcovidcountydailyrates2020UPDATED.dta" 

rename CC20_334d defundthepolice

// Exploratory analysis

pwcorr defundthepolice deaths_avg_per_100k cases_avg_per_100k daily_case_pct daily_death_pct weekly_case_pct weekly_death_pct monthly_case_pct monthly_death_pct [aweight=commonweight], sig // Little evidence for such a relationship 

log close